### Preliminary ---------------------------------------------------------------
# Load libraries
library(fixest)
library(modelsummary)
library(scales)
library(tidyverse)

# Load data
state_df <- read_csv("State by Decade Data.csv")

# Specify options
options("modelsummary_format_numeric_latex" = "plain")

### Table 1 and Table S4: AA Population Growth and Representation --------------
# Estimate models, adding an additional lag each time
model_1 <- feols(PctRepsAA ~ PctAsian + 
                   PctDemVS + PctNWNAsian + MedianIncome + CollegeEducated |
                   State + Year,
                 vcov = "cluster",
                 data = filter(state_df, 
                               StateAbb != "HI",
                               Year >= 1990))

model_2 <- feols(PctRepsAA ~ PctAsian + 
                   PctAsianLag1 + 
                   PctDemVS + PctNWNAsian + MedianIncome + CollegeEducated |
                   State + Year,
                 vcov = "cluster",
                 data = filter(state_df, StateAbb != "HI",
                               Year >= 1990))

model_3 <- feols(PctRepsAA ~ PctAsian + 
                   PctAsianLag1 + PctAsianLag2 + 
                   PctDemVS + PctNWNAsian + MedianIncome + CollegeEducated |
                   State + Year,
                 vcov = "cluster",
                 data = filter(state_df, StateAbb != "HI",
                               Year >= 1990))

model_4 <- feols(PctRepsAA ~ PctAsian + 
                   PctAsianLag1 + PctAsianLag2 + PctAsianLag3 + 
                   PctDemVS + PctNWNAsian + MedianIncome + CollegeEducated |
                   State + Year,
                 vcov = "cluster",
                 data = filter(state_df, StateAbb != "HI",
                               Year >= 1990))

# Put into table form
modelsummary(list("% Asian-American \n Representatives" = model_1,
                  "% Asian-American \n Representatives" = model_2,
                  "% Asian-American \n Representatives" = model_3,
                  "% Asian-American \n Representatives" = model_4),
             output = "latex",
             title = "Asian American Population Growth and Representation in State Legislatures",
             fmt=3,
             stars= c('+' = 0.1,
                      '*' = .05,
                      '**' = .01),
             statistic = "({std.error})",
             estimate  = "{estimate}{stars}",
             coef_map = c("PctAsian" = "Asian Population (%)",
                          "PctAsianLag1" = "Asian Population (One Decade Prior)",
                          "PctAsianLag2" = "Asian Population (Two Decades Prior)",
                          "PctAsianLag3" = "Asian Population (Three Decades Prior)",
                          "PctDemVS" = "Democrat Presidential Vote Share",
                          "PctNWNAsian" = "Nonwhite, Nonasian Population (%)",
                          "MedianIncome" = "Median Income",
                          "CollegeEducated" = "Colleged Educated (%)"
             ),
             note = "Standard errors clustered by state shown in parentheses. *p<0.05; **p<0.01",
             gof_map = c("nobs", "adj.r.squared"),
             add_rows = data.frame(Term = c("Years", "Controls", "State FEs", "Decade FEs"),
                                   model_1 = c("1990 - 2019", "Y", "Y", "Y"),
                                   model_2 = c("1990 - 2019", "Y", "Y", "Y"),
                                   model_3 = c("1990 - 2019", "Y", "Y", "Y"),
                                   model_4 = c("1990 - 2019", "Y", "Y", "Y"))
)

### Table S5: AA Population Growth and Representation in State Legislatures -----
# Estimate models, adding an additional lag each time
model_1 <- feols(PctRepsAA ~ PctAsian + 
                   PctDemVS + PctNWNAsian + MedianIncome + CollegeEducated |
                   State + Year,
                 vcov = "cluster",
                 data = filter(state_df, 
                               StateAbb != "HI",
                               Year >= 1980))

model_2 <- feols(PctRepsAA ~ PctAsian + 
                   PctAsianLag1 + 
                   PctDemVS + PctNWNAsian + MedianIncome + CollegeEducated |
                   State + Year,
                 vcov = "cluster",
                 data = filter(state_df, StateAbb != "HI",
                               Year >= 1980))

model_3 <- feols(PctRepsAA ~ PctAsian + 
                   PctAsianLag1 + PctAsianLag2 + 
                   PctDemVS + PctNWNAsian + MedianIncome + CollegeEducated |
                   State + Year,
                 vcov = "cluster",
                 data = filter(state_df, StateAbb != "HI",
                               Year >= 1980))
# Put into table form
modelsummary(list("% Asian-American \n Representatives" = model_1,
                  "% Asian-American \n Representatives" = model_2,
                  "% Asian-American \n Representatives" = model_3),
             output = "latex",
             title = "Replicating Table 1 with Only Two Decade Lags and 1980s Included",
             fmt=3,
             stars= c('+' = 0.1,
                      '*' = .05,
                      '**' = .01),
             statistic = "({std.error})",
             estimate  = "{estimate}{stars}",
             coef_map = c("PctAsian" = "Asian Population (%)",
                          "PctAsianLag1" = "Asian Population (One Decade Prior)",
                          "PctAsianLag2" = "Asian Population (Two Decades Prior)",
                          "PctAsianLag3" = "Asian Population (Three Decades Prior)",
                          "PctDemVS" = "Democrat Presidential Vote Share",
                          "PctNWNAsian" = "Nonwhite, Nonasian Population (%)",
                          "MedianIncome" = "Median Income",
                          "CollegeEducated" = "Colleged Educated (%)"
             ),
             note = "Standard errors clustered by state shown in parentheses. *p<0.05; **p<0.01",
             gof_map = c("nobs", "adj.r.squared"),
             add_rows = data.frame(Term = c("Years", "Controls", "State FEs", "Decade FEs"),
                                   model_1 = c("1980 - 2019", "Y", "Y", "Y"),
                                   model_2 = c("1980 - 2019", "Y", "Y", "Y"),
                                   model_3 = c("1980 - 2019", "Y", "Y", "Y"))
)

### Figure 2: Population and Representation Dynamics, Example States -----------
# Prepare data for graphing
graphing_data <- state_df %>%
  filter(StateAbb %in% c("CA", "WA", "NJ", "NY")) %>%
  select(State, Year, PctAsian, PctRepsAA) %>%
  group_by(State) %>%
  mutate(PctAsianLag2 = lag(PctAsian, 2)) %>%
  pivot_longer(cols = c(PctAsian, 
                        PctAsianLag2, PctRepsAA),
               values_to = "Percent",
               names_to = "Variable") %>%
  mutate(Variable = case_when(
    Variable == "PctAsian" ~ "Asian Population % \n (Contemporaneous)",
    Variable == "PctAsianLag2" ~ "Asian Population % \n (Two Decades Prior)",
    TRUE ~ "Asian State Legislator % \n (Contemporaneous)"
  ))

# Create graph
ggplot(graphing_data, 
         aes(x = Year, y = Percent, shape = Variable, lty = Variable)) +
  geom_point() +
  geom_line() +
  facet_wrap(~State) + 
  scale_x_continuous(limits = c(1980, 2010)) + 
  scale_y_continuous(labels = percent_format(scale = 1)) + 
  labs(x = NULL, y = NULL, lty = NULL, shape = NULL) + 
  theme_bw() +
  theme(
    legend.position = "bottom"
  )
